Assignment 1

Author

Kyra Guy

1.

library(data.table)
data2002 <- read.csv("~/Desktop/2002_PM2.5.csv")
data2022 <- read.csv("~/Desktop/2022_PM2.5.csv")

dim(data2002)
[1] 15976    20
dim(data2022)
[1] 57761    20
head(data2002)
        Date Source  Site.ID POC Daily.Mean.PM2.5.Concentration    UNITS
1 01/05/2002    AQS 60010007   1                           25.1 ug/m3 LC
2 01/06/2002    AQS 60010007   1                           31.6 ug/m3 LC
3 01/08/2002    AQS 60010007   1                           21.4 ug/m3 LC
4 01/11/2002    AQS 60010007   1                           25.9 ug/m3 LC
5 01/14/2002    AQS 60010007   1                           34.5 ug/m3 LC
6 01/17/2002    AQS 60010007   1                           41.0 ug/m3 LC
  DAILY_AQI_VALUE Site.Name DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
1              78 Livermore               1              100              88101
2              92 Livermore               1              100              88101
3              71 Livermore               1              100              88101
4              80 Livermore               1              100              88101
5              98 Livermore               1              100              88101
6             115 Livermore               1              100              88101
        AQS_PARAMETER_DESC CBSA_CODE                         CBSA_NAME
1 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
2 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
3 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
4 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
5 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
6 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
  STATE_CODE      STATE COUNTY_CODE  COUNTY SITE_LATITUDE SITE_LONGITUDE
1          6 California           1 Alameda      37.68753      -121.7842
2          6 California           1 Alameda      37.68753      -121.7842
3          6 California           1 Alameda      37.68753      -121.7842
4          6 California           1 Alameda      37.68753      -121.7842
5          6 California           1 Alameda      37.68753      -121.7842
6          6 California           1 Alameda      37.68753      -121.7842
head(data2022)
        Date Source  Site.ID POC Daily.Mean.PM2.5.Concentration    UNITS
1 01/01/2022    AQS 60010007   3                           12.7 ug/m3 LC
2 01/02/2022    AQS 60010007   3                           13.9 ug/m3 LC
3 01/03/2022    AQS 60010007   3                            7.1 ug/m3 LC
4 01/04/2022    AQS 60010007   3                            3.7 ug/m3 LC
5 01/05/2022    AQS 60010007   3                            4.2 ug/m3 LC
6 01/06/2022    AQS 60010007   3                            3.8 ug/m3 LC
  DAILY_AQI_VALUE Site.Name DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
1              52 Livermore               1              100              88101
2              55 Livermore               1              100              88101
3              30 Livermore               1              100              88101
4              15 Livermore               1              100              88101
5              18 Livermore               1              100              88101
6              16 Livermore               1              100              88101
        AQS_PARAMETER_DESC CBSA_CODE                         CBSA_NAME
1 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
2 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
3 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
4 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
5 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
6 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
  STATE_CODE      STATE COUNTY_CODE  COUNTY SITE_LATITUDE SITE_LONGITUDE
1          6 California           1 Alameda      37.68753      -121.7842
2          6 California           1 Alameda      37.68753      -121.7842
3          6 California           1 Alameda      37.68753      -121.7842
4          6 California           1 Alameda      37.68753      -121.7842
5          6 California           1 Alameda      37.68753      -121.7842
6          6 California           1 Alameda      37.68753      -121.7842
tail(data2002)
            Date Source  Site.ID POC Daily.Mean.PM2.5.Concentration    UNITS
15971 12/10/2002    AQS 61131003   1                             15 ug/m3 LC
15972 12/13/2002    AQS 61131003   1                             15 ug/m3 LC
15973 12/22/2002    AQS 61131003   1                              1 ug/m3 LC
15974 12/25/2002    AQS 61131003   1                             23 ug/m3 LC
15975 12/28/2002    AQS 61131003   1                              5 ug/m3 LC
15976 12/31/2002    AQS 61131003   1                              6 ug/m3 LC
      DAILY_AQI_VALUE            Site.Name DAILY_OBS_COUNT PERCENT_COMPLETE
15971              57 Woodland-Gibson Road               1              100
15972              57 Woodland-Gibson Road               1              100
15973               4 Woodland-Gibson Road               1              100
15974              74 Woodland-Gibson Road               1              100
15975              21 Woodland-Gibson Road               1              100
15976              25 Woodland-Gibson Road               1              100
      AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
15971              88101 PM2.5 - Local Conditions     40900
15972              88101 PM2.5 - Local Conditions     40900
15973              88101 PM2.5 - Local Conditions     40900
15974              88101 PM2.5 - Local Conditions     40900
15975              88101 PM2.5 - Local Conditions     40900
15976              88101 PM2.5 - Local Conditions     40900
                                    CBSA_NAME STATE_CODE      STATE COUNTY_CODE
15971 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
15972 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
15973 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
15974 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
15975 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
15976 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
      COUNTY SITE_LATITUDE SITE_LONGITUDE
15971   Yolo      38.66121      -121.7327
15972   Yolo      38.66121      -121.7327
15973   Yolo      38.66121      -121.7327
15974   Yolo      38.66121      -121.7327
15975   Yolo      38.66121      -121.7327
15976   Yolo      38.66121      -121.7327
tail(data2022)
            Date Source  Site.ID POC Daily.Mean.PM2.5.Concentration    UNITS
57756 12/01/2022    AQS 61131003   1                            3.4 ug/m3 LC
57757 12/07/2022    AQS 61131003   1                            3.8 ug/m3 LC
57758 12/13/2022    AQS 61131003   1                            6.0 ug/m3 LC
57759 12/19/2022    AQS 61131003   1                           34.8 ug/m3 LC
57760 12/25/2022    AQS 61131003   1                           23.2 ug/m3 LC
57761 12/31/2022    AQS 61131003   1                            1.0 ug/m3 LC
      DAILY_AQI_VALUE            Site.Name DAILY_OBS_COUNT PERCENT_COMPLETE
57756              14 Woodland-Gibson Road               1              100
57757              16 Woodland-Gibson Road               1              100
57758              25 Woodland-Gibson Road               1              100
57759              99 Woodland-Gibson Road               1              100
57760              74 Woodland-Gibson Road               1              100
57761               4 Woodland-Gibson Road               1              100
      AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
57756              88101 PM2.5 - Local Conditions     40900
57757              88101 PM2.5 - Local Conditions     40900
57758              88101 PM2.5 - Local Conditions     40900
57759              88101 PM2.5 - Local Conditions     40900
57760              88101 PM2.5 - Local Conditions     40900
57761              88101 PM2.5 - Local Conditions     40900
                                    CBSA_NAME STATE_CODE      STATE COUNTY_CODE
57756 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
57757 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
57758 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
57759 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
57760 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
57761 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
      COUNTY SITE_LATITUDE SITE_LONGITUDE
57756   Yolo      38.66121      -121.7327
57757   Yolo      38.66121      -121.7327
57758   Yolo      38.66121      -121.7327
57759   Yolo      38.66121      -121.7327
57760   Yolo      38.66121      -121.7327
57761   Yolo      38.66121      -121.7327
colnames(data2002)
 [1] "Date"                           "Source"                        
 [3] "Site.ID"                        "POC"                           
 [5] "Daily.Mean.PM2.5.Concentration" "UNITS"                         
 [7] "DAILY_AQI_VALUE"                "Site.Name"                     
 [9] "DAILY_OBS_COUNT"                "PERCENT_COMPLETE"              
[11] "AQS_PARAMETER_CODE"             "AQS_PARAMETER_DESC"            
[13] "CBSA_CODE"                      "CBSA_NAME"                     
[15] "STATE_CODE"                     "STATE"                         
[17] "COUNTY_CODE"                    "COUNTY"                        
[19] "SITE_LATITUDE"                  "SITE_LONGITUDE"                
colnames(data2022)
 [1] "Date"                           "Source"                        
 [3] "Site.ID"                        "POC"                           
 [5] "Daily.Mean.PM2.5.Concentration" "UNITS"                         
 [7] "DAILY_AQI_VALUE"                "Site.Name"                     
 [9] "DAILY_OBS_COUNT"                "PERCENT_COMPLETE"              
[11] "AQS_PARAMETER_CODE"             "AQS_PARAMETER_DESC"            
[13] "CBSA_CODE"                      "CBSA_NAME"                     
[15] "STATE_CODE"                     "STATE"                         
[17] "COUNTY_CODE"                    "COUNTY"                        
[19] "SITE_LATITUDE"                  "SITE_LONGITUDE"                
str(data2002)
'data.frame':   15976 obs. of  20 variables:
 $ Date                          : chr  "01/05/2002" "01/06/2002" "01/08/2002" "01/11/2002" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site.ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Daily.Mean.PM2.5.Concentration: num  25.1 31.6 21.4 25.9 34.5 41 29.3 15 18.8 37.9 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  78 92 71 80 98 115 87 57 65 107 ...
 $ Site.Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
str(data2022)
'data.frame':   57761 obs. of  20 variables:
 $ Date                          : chr  "01/01/2022" "01/02/2022" "01/03/2022" "01/04/2022" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site.ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  3 3 3 3 3 3 3 3 3 3 ...
 $ Daily.Mean.PM2.5.Concentration: num  12.7 13.9 7.1 3.7 4.2 3.8 2.3 6.9 13.6 11.2 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  52 55 30 15 18 16 10 29 54 47 ...
 $ Site.Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
summary(data2002)
     Date              Source             Site.ID              POC       
 Length:15976       Length:15976       Min.   :60010007   Min.   :1.000  
 Class :character   Class :character   1st Qu.:60290014   1st Qu.:1.000  
 Mode  :character   Mode  :character   Median :60590007   Median :1.000  
                                       Mean   :60549600   Mean   :1.581  
                                       3rd Qu.:60731002   3rd Qu.:1.000  
                                       Max.   :61131003   Max.   :6.000  
                                                                         
 Daily.Mean.PM2.5.Concentration    UNITS           DAILY_AQI_VALUE 
 Min.   :  0.00                 Length:15976       Min.   :  0.00  
 1st Qu.:  7.00                 Class :character   1st Qu.: 29.00  
 Median : 12.00                 Mode  :character   Median : 50.00  
 Mean   : 16.12                                    Mean   : 53.68  
 3rd Qu.: 20.50                                    3rd Qu.: 69.00  
 Max.   :104.30                                    Max.   :176.00  
                                                                   
  Site.Name         DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
 Length:15976       Min.   :1       Min.   :100      Min.   :88101     
 Class :character   1st Qu.:1       1st Qu.:100      1st Qu.:88101     
 Mode  :character   Median :1       Median :100      Median :88101     
                    Mean   :1       Mean   :100      Mean   :88215     
                    3rd Qu.:1       3rd Qu.:100      3rd Qu.:88502     
                    Max.   :1       Max.   :100      Max.   :88502     
                                                                       
 AQS_PARAMETER_DESC   CBSA_CODE      CBSA_NAME           STATE_CODE
 Length:15976       Min.   :12540   Length:15976       Min.   :6   
 Class :character   1st Qu.:23420   Class :character   1st Qu.:6   
 Mode  :character   Median :40140   Mode  :character   Median :6   
                    Mean   :33270                      Mean   :6   
                    3rd Qu.:41740                      3rd Qu.:6   
                    Max.   :49700                      Max.   :6   
                    NA's   :929                                    
    STATE            COUNTY_CODE        COUNTY          SITE_LATITUDE  
 Length:15976       Min.   :  1.00   Length:15976       Min.   :32.63  
 Class :character   1st Qu.: 29.00   Class :character   1st Qu.:34.07  
 Mode  :character   Median : 59.00   Mode  :character   Median :35.36  
                    Mean   : 54.78                      Mean   :36.00  
                    3rd Qu.: 73.00                      3rd Qu.:37.77  
                    Max.   :113.00                      Max.   :41.71  
                                                                       
 SITE_LONGITUDE  
 Min.   :-124.2  
 1st Qu.:-121.4  
 Median :-119.1  
 Mean   :-119.4  
 3rd Qu.:-117.9  
 Max.   :-115.5  
                 
summary(data2022)
     Date              Source             Site.ID              POC        
 Length:57761       Length:57761       Min.   :60010007   Min.   : 1.000  
 Class :character   Class :character   1st Qu.:60311004   1st Qu.: 1.000  
 Mode  :character   Mode  :character   Median :60631007   Median : 3.000  
                                       Mean   :60571692   Mean   : 2.531  
                                       3rd Qu.:60771003   3rd Qu.: 3.000  
                                       Max.   :61131003   Max.   :21.000  
                                                                          
 Daily.Mean.PM2.5.Concentration    UNITS           DAILY_AQI_VALUE 
 Min.   : -2.200                Length:57761       Min.   :  0.00  
 1st Qu.:  4.200                Class :character   1st Qu.: 18.00  
 Median :  7.000                Mode  :character   Median : 29.00  
 Mean   :  8.564                                   Mean   : 32.94  
 3rd Qu.: 10.900                                   3rd Qu.: 45.00  
 Max.   :302.500                                   Max.   :353.00  
                                                                   
  Site.Name         DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
 Length:57761       Min.   :1       Min.   :100      Min.   :88101     
 Class :character   1st Qu.:1       1st Qu.:100      1st Qu.:88101     
 Mode  :character   Median :1       Median :100      Median :88101     
                    Mean   :1       Mean   :100      Mean   :88196     
                    3rd Qu.:1       3rd Qu.:100      3rd Qu.:88101     
                    Max.   :1       Max.   :100      Max.   :88502     
                                                                       
 AQS_PARAMETER_DESC   CBSA_CODE      CBSA_NAME           STATE_CODE
 Length:57761       Min.   :12540   Length:57761       Min.   :6   
 Class :character   1st Qu.:31080   Class :character   1st Qu.:6   
 Mode  :character   Median :40140   Mode  :character   Median :6   
                    Mean   :35445                      Mean   :6   
                    3rd Qu.:41860                      3rd Qu.:6   
                    Max.   :49700                      Max.   :6   
                    NA's   :4761                                   
    STATE            COUNTY_CODE        COUNTY          SITE_LATITUDE  
 Length:57761       Min.   :  1.00   Length:57761       Min.   :32.58  
 Class :character   1st Qu.: 31.00   Class :character   1st Qu.:34.14  
 Mode  :character   Median : 63.00   Mode  :character   Median :36.60  
                    Mean   : 57.02                      Mean   :36.37  
                    3rd Qu.: 77.00                      3rd Qu.:38.10  
                    Max.   :113.00                      Max.   :41.76  
                                                                       
 SITE_LONGITUDE  
 Min.   :-124.2  
 1st Qu.:-121.5  
 Median :-119.8  
 Mean   :-119.7  
 3rd Qu.:-118.1  
 Max.   :-115.5  
                 
any(is.na(data2002))
[1] TRUE
any(is.na(data2022))
[1] TRUE
col_has_missing <- apply(data2002, 2, function(x) any(is.na(x)))
colnames(data2002)[col_has_missing]
[1] "CBSA_CODE"
col_has_missing <- apply(data2022, 2, function(x) any(is.na(x)))
colnames(data2022)[col_has_missing]
[1] "CBSA_CODE"
table(data2002$Site.Name)

                                                  
                                              230 
                          3425 N FIRST ST, FRESNO 
                                              499 
                               Alturas-Fourth St. 
                                                2 
                                          Anaheim 
                                              351 
                            Aqua Tibia Wilderness 
                                              104 
                       Atascadero (original site) 
                                              116 
                                            Azusa 
                                              339 
                      Bakersfield-Airport (Planz) 
                                              117 
                           Bakersfield-California 
                                             1295 
                        Bakersfield-Golden / M St 
                                               91 
                                         Big Bear 
                                               56 
                                         Bliss SP 
                                              113 
                             Brawley-401 Main St. 
                                               85 
                                          Burbank 
                                              122 
                            Calexico-Ethel Street 
                                              165 
                             Chico-Manzanita Ave. 
                                              120 
                                      Chula Vista 
                                              116 
                                     Clovis-Villa 
                                               86 
                              Colusa-Sunrise Blvd 
                                               95 
                                          Concord 
                                              276 
                               Corcoran-Patterson 
                                               83 
                   Death Valley NP - Park Village 
                                              115 
                                      Echo Summit 
                                                2 
                                         El Cajon 
                                              416 
                             El Centro-9th Street 
                                               92 
                        El Rio-Rio Mesa School #2 
                                              114 
                                        Escondido 
                                              355 
                                  Eureka I Street 
                                               59 
                                          Fontana 
                                              118 
                             Fremont - Chapel Way 
                                              105 
                                   Fresno-Pacific 
                                               91 
                     Grass Valley-Litton Building 
                                               50 
                                Hoover Wilderness 
                                              111 
                                            Indio 
                                              117 
                      Joshua Tree NP - Black Rock 
                                               96 
                                Kaiser Wilderness 
                                               84 
                                      Kearny Mesa 
                                              111 
                                           Keeler 
                                              162 
                          Lakeport-Lakeport Blvd. 
                                               61 
                        Lancaster-Division Street 
                                              107 
 Lassen Volcanic NP - Manzanita Lake Fire Station 
                                              104 
                      Lava Beds National Monument 
                                              104 
                                            Lebec 
                                              109 
                          Lebec-Peace Valley Road 
                                               93 
                                        Livermore 
                                               96 
                               Long Beach (North) 
                                              356 
                    Los Angeles-North Main Street 
                                              365 
                                          Lynwood 
                                              122 
                                      Merced-M St 
                                               89 
                                    Mission Viejo 
                                              119 
                              Modesto-14th Street 
                                              183 
                                           Mojave 
                                              100 
                             Ontario Fire Station 
                                              111 
                                     Palm Springs 
                                              119 
                                         Pasadena 
                                              121 
Pinnacles NP - Southwest of East Entrance Station 
                                              119 
                                   Piru - Pacific 
                                              117 
                    Point Reyes NS Ranger Station 
                                               97 
                        Portola-161 Nevada Street 
                                              100 
                           Quincy-N Church Street 
                                               77 
                                Redding - Buckeye 
                                               54 
                      Redding - Health Department 
                                               54 
                                  Redding - Toyon 
                                               59 
                                     Redwood City 
                                              100 
                                       Redwood NP 
                                              110 
                                           Reseda 
                                              120 
                        Ridgecrest-California Ave 
                                              104 
                             Riverside (Magnolia) 
                                              115 
                          Roseville-N Sunrise Ave 
                                               60 
                                         Rubidoux 
                                              562 
      Sacramento Health Department-Stockton Blvd. 
                                              154 
                         Sacramento-1309 T Street 
                                              380 
                        Sacramento-Del Paso Manor 
                                              285 
                                        Salinas 3 
                                              120 
                     San Andreas-Gold Strike Road 
                                               60 
                                   San Bernardino 
                                              117 
                               San Diego-12th Ave 
                                              352 
                                    San Francisco 
                                              196 
                                         San Jose 
                                              217 
                               San Jose - 4th St. 
                                              141 
                               San Jose - Jackson 
                                              101 
                        San Luis Obispo-Marsh St. 
                                               52 
                            San Rafael Wilderness 
                                               92 
                                       Santa Cruz 
                                               61 
                                      Santa Maria 
                                               60 
                              Santa Rosa - 5th St 
                                               93 
                          Sequoia NP-Ash Mountain 
                                              104 
                       Simi Valley-Cochran Street 
                                              169 
                       South Lake Tahoe-Sandy Way 
                                               93 
                                Stockton-Hazelton 
                                              124 
                                    Thousand Oaks 
                                              156 
                        TRAFFIC, RURAL PAVED ROAD 
                                              353 
                                          Trinity 
                                               90 
                             Truckee-Fire Station 
                                              176 
                                    Ukiah-Library 
                                              122 
                                          Vallejo 
                                               97 
                          Victorville-Park Avenue 
                                              225 
                                   Visalia-Church 
                                              404 
                             Woodland-Gibson Road 
                                              112 
                    Yosemite NP - Turtleback Dome 
                                              116 
       Yosemite NP-Yosemite Village Vistor Center 
                                              174 
                                        Yuba City 
                                              114 
summary(data2002$Daily.Mean.PM2.5.Concentration)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    7.00   12.00   16.12   20.50  104.30 
table(data2022$Site.Name)

                                                  
                                              593 
                                         29 Palms 
                                              414 
                                           Alpine 
                                              382 
                                          Anaheim 
                                              774 
                            Aqua Tibia Wilderness 
                                               64 
                                Arroyo Grande CDF 
                                              359 
                                       Atascadero 
                                              354 
                                    Auburn-Atwood 
                                              360 
                                            Azusa 
                                               76 
                      Bakersfield-Airport (Planz) 
                                              114 
                           Bakersfield-California 
                                              713 
                        Bakersfield-Golden / M St 
                                              330 
                                  Banning Airport 
                                              365 
                                         Big Bear 
                                              394 
                                 Bishop Tribe EMO 
                                              354 
                          Brawley-220 Main Street 
                                              364 
                            Calexico-Ethel Street 
                                              448 
                                   Camp Pendleton 
                                              357 
                         Campo Indian Reservation 
                                              362 
                                    Carmel Valley 
                                              350 
                            Chester-222 First Ave 
                                              289 
                                Chico-East Avenue 
                                              411 
                                      Chula Vista 
                                              102 
                                     Clovis-Villa 
                                              359 
                                 Colfax-City Hall 
                                              358 
                              Colusa-Sunrise Blvd 
                                              359 
                                          Compton 
                                              723 
                                          Concord 
                                              463 
                               Corcoran-Patterson 
                                              360 
                         Cortina Indian Rancheria 
                                               42 
                Crescent City-Crescent Elk School 
                                              338 
                                        Crestline 
                                              178 
                                 Davis-UCD Campus 
                                              356 
                                          Donovan 
                                              360 
           El Cajon - Lexington Elementary School 
                                              491 
                             El Centro-9th Street 
                                              363 
                        El Rio-Rio Mesa School #2 
                                              361 
                                Folsom-Natoma St. 
                                              696 
                                          Fontana 
                                              180 
                                 Fresno - Garland 
                                              542 
                                   Fresno-Foundry 
                                              363 
                                   Fresno-Pacific 
                                              352 
                                           Gilroy 
                                              349 
                                         Glendora 
                                              365 
                                           Goleta 
                                              361 
                     Grass Valley-Litton Building 
                                              346 
                                    Hanford-Irwin 
                                              361 
                                        Hollister 
                                              359 
                                Hoover Wilderness 
                                              117 
                                            Huron 
                                              360 
                                            Indio 
                                              123 
                                           Jacobs 
                                              116 
                      Joshua Tree NP - Black Rock 
                                              119 
                                Kaiser Wilderness 
                                              112 
                                           Keeler 
                                              479 
                                      King City 2 
                                              355 
                                    Lake Elsinore 
                                              365 
                     Lake Tahoe Community College 
                                              111 
                          Lakeport-S. Main Street 
                                               61 
                        Lancaster-Division Street 
                                              352 
                                    Laney College 
                                              360 
 Lassen Volcanic NP - Manzanita Lake Fire Station 
                                              107 
                      Lava Beds National Monument 
                                              112 
                                            Lebec 
                                              398 
            Lebec-Peace Valley/Frazier Park Roads 
                                              106 
                                       Lee Vining 
                                              363 
                          Lincoln-2885 Moore Road 
                                              357 
                                        Livermore 
                                              343 
                                  Lompoc H Street 
                                              361 
            Lone Pine Paiute-Shoshone Reservation 
                                              302 
                               Long Beach (North) 
                                               55 
                               Long Beach (South) 
                                              243 
                   Long Beach-Route 710 Near Road 
                                              625 
                    Los Angeles-North Main Street 
                                              911 
                                      Madera-City 
                                              360 
                                          Mammoth 
                                              215 
                                          Manteca 
                                              343 
                                    Merced-Coffee 
                                              359 
                                      Merced-M St 
                                              360 
                                            Mesa2 
                                              361 
                            Mira Loma (Van Buren) 
                                              778 
                                    Mission Viejo 
                                              108 
                              Modesto-14th Street 
                                              415 
                          Mojave - CA 58 Business 
                                              355 
                   Morongo Air Monitoring Station 
                                              385 
                           North Hollywood (NOHO) 
                                              365 
                                          Oakland 
                                              365 
                                     Oakland West 
                                              364 
                             Ojai - East Ojai Ave 
                                              360 
                       Ontario-Route 60 Near Road 
                                              705 
                                      Pala Airpad 
                                              365 
                                     Palm Springs 
                                              120 
                               Paradise - Theater 
                                              358 
                                         Pasadena 
                                              120 
                                         Pechanga 
                                              402 
                                   Pico Rivera #2 
                                              118 
Pinnacles NP - Southwest of East Entrance Station 
                                              119 
                                   Piru - Pacific 
                                              361 
                            Pleasanton - Owens Ct 
                                              361 
                    Point Reyes NS Ranger Station 
                                              116 
                                      Porterville 
                                              356 
                                          Portola 
                                              553 
                           Quincy-N Church Street 
                                              434 
             Red Bluff-Walnut St. District Office 
                                              347 
                      Redding - Health Department 
                                              377 
                                     Redwood City 
                                              356 
                                       Redwood NP 
                                              120 
                                           Reseda 
                                              482 
                                  Ridgecrest-Ward 
                                              360 
                          Roseville-N Sunrise Ave 
                                              357 
                                         Rubidoux 
                                              818 
                         Sacramento-1309 T Street 
                                              446 
                          Sacramento-Bercut Drive 
                                              359 
                        Sacramento-Del Paso Manor 
                                              703 
                                        Salinas 3 
                                              418 
                     San Andreas-Gold Strike Road 
                                              355 
                                   San Bernardino 
                                              118 
                     San Diego - Kearny Villa Rd. 
                                              172 
            San Diego - Sherman Elementary School 
                                              437 
                   San Diego -Rancho Carmel Drive 
                                              119 
                                    San Francisco 
                                              361 
                               San Jose - Jackson 
                                              485 
                           San Jose - Knox Avenue 
                                              358 
                 San Lorenzo Valley Middle School 
                                              358 
                                        San Pablo 
                                              358 
                                       San Rafael 
                                              360 
                            San Rafael Wilderness 
                                               95 
                                    Santa Barbara 
                                              351 
                                    Santa Clarita 
                                              365 
                                       Santa Cruz 
                                              352 
                                       Sebastopol 
                                              349 
        Sequoia & Kings Canyon NPs - Ash Mountain 
                                              352 
                          Sequoia NP-Ash Mountain 
                                              122 
                               Signal Hill (LBSH) 
                                              293 
                       Simi Valley-Cochran Street 
                                              714 
                                      SLO Roberto 
                                              364 
                                      Sloughhouse 
                                              355 
                       Stn.1 Big Pine Paiute site 
                                              325 
                       Stockton - University Park 
                                              351 
               Table Mountain Air Monitoring Site 
                                              339 
                         Tahoe City-Fairway Drive 
                                              356 
                                         Temecula 
                                              365 
                                    Thousand Oaks 
                                              358 
                      Torres Martinez Reservation 
                                              235 
                                    Tracy-Airport 
                                              355 
                        TRAFFIC, RURAL PAVED ROAD 
                                              357 
                                     Tranquillity 
                                              352 
                                          Trinity 
                                               50 
                             Truckee-Fire Station 
                                              650 
                                          Turlock 
                                              357 
                                    Ukiah-Library 
                                              350 
                                           Upland 
                                              365 
                                          Vallejo 
                                              720 
                          Victorville-Park Avenue 
                                              722 
                        Visalia-W. Ashland Avenue 
                                              393 
                           Weaverville-Courthouse 
                                              352 
White Mountain Research Center - Owens Valley Lab 
                                              420 
                             Willits-Blosser Lane 
                                              351 
                            Willows-Colusa Street 
                                              351 
                             Woodland-Gibson Road 
                                               59 
                    Yosemite NP - Turtleback Dome 
                                              224 
       Yosemite NP-Yosemite Village Vistor Center 
                                              356 
                                            Yreka 
                                              589 
                                        Yuba City 
                                              715 
summary(data2022$Daily.Mean.PM2.5.Concentration)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -2.200   4.200   7.000   8.564  10.900 302.500 

There are 20 variables in each dataset. The data from 2002 has 15976 observations and the data from 2022 has 57761 observations. The datasets have the same variable names. There was some missing data in the ‘CBSA_CODE’ variable. I will be removing the missing data if working with this variable.

2.

alldata <- rbind(data2002, data2022)

dim(alldata)
[1] 73737    20
head(alldata)
        Date Source  Site.ID POC Daily.Mean.PM2.5.Concentration    UNITS
1 01/05/2002    AQS 60010007   1                           25.1 ug/m3 LC
2 01/06/2002    AQS 60010007   1                           31.6 ug/m3 LC
3 01/08/2002    AQS 60010007   1                           21.4 ug/m3 LC
4 01/11/2002    AQS 60010007   1                           25.9 ug/m3 LC
5 01/14/2002    AQS 60010007   1                           34.5 ug/m3 LC
6 01/17/2002    AQS 60010007   1                           41.0 ug/m3 LC
  DAILY_AQI_VALUE Site.Name DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
1              78 Livermore               1              100              88101
2              92 Livermore               1              100              88101
3              71 Livermore               1              100              88101
4              80 Livermore               1              100              88101
5              98 Livermore               1              100              88101
6             115 Livermore               1              100              88101
        AQS_PARAMETER_DESC CBSA_CODE                         CBSA_NAME
1 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
2 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
3 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
4 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
5 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
6 PM2.5 - Local Conditions     41860 San Francisco-Oakland-Hayward, CA
  STATE_CODE      STATE COUNTY_CODE  COUNTY SITE_LATITUDE SITE_LONGITUDE
1          6 California           1 Alameda      37.68753      -121.7842
2          6 California           1 Alameda      37.68753      -121.7842
3          6 California           1 Alameda      37.68753      -121.7842
4          6 California           1 Alameda      37.68753      -121.7842
5          6 California           1 Alameda      37.68753      -121.7842
6          6 California           1 Alameda      37.68753      -121.7842
tail(alldata)
            Date Source  Site.ID POC Daily.Mean.PM2.5.Concentration    UNITS
73732 12/01/2022    AQS 61131003   1                            3.4 ug/m3 LC
73733 12/07/2022    AQS 61131003   1                            3.8 ug/m3 LC
73734 12/13/2022    AQS 61131003   1                            6.0 ug/m3 LC
73735 12/19/2022    AQS 61131003   1                           34.8 ug/m3 LC
73736 12/25/2022    AQS 61131003   1                           23.2 ug/m3 LC
73737 12/31/2022    AQS 61131003   1                            1.0 ug/m3 LC
      DAILY_AQI_VALUE            Site.Name DAILY_OBS_COUNT PERCENT_COMPLETE
73732              14 Woodland-Gibson Road               1              100
73733              16 Woodland-Gibson Road               1              100
73734              25 Woodland-Gibson Road               1              100
73735              99 Woodland-Gibson Road               1              100
73736              74 Woodland-Gibson Road               1              100
73737               4 Woodland-Gibson Road               1              100
      AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
73732              88101 PM2.5 - Local Conditions     40900
73733              88101 PM2.5 - Local Conditions     40900
73734              88101 PM2.5 - Local Conditions     40900
73735              88101 PM2.5 - Local Conditions     40900
73736              88101 PM2.5 - Local Conditions     40900
73737              88101 PM2.5 - Local Conditions     40900
                                    CBSA_NAME STATE_CODE      STATE COUNTY_CODE
73732 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
73733 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
73734 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
73735 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
73736 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
73737 Sacramento--Roseville--Arden-Arcade, CA          6 California         113
      COUNTY SITE_LATITUDE SITE_LONGITUDE
73732   Yolo      38.66121      -121.7327
73733   Yolo      38.66121      -121.7327
73734   Yolo      38.66121      -121.7327
73735   Yolo      38.66121      -121.7327
73736   Yolo      38.66121      -121.7327
73737   Yolo      38.66121      -121.7327
colnames(alldata)
 [1] "Date"                           "Source"                        
 [3] "Site.ID"                        "POC"                           
 [5] "Daily.Mean.PM2.5.Concentration" "UNITS"                         
 [7] "DAILY_AQI_VALUE"                "Site.Name"                     
 [9] "DAILY_OBS_COUNT"                "PERCENT_COMPLETE"              
[11] "AQS_PARAMETER_CODE"             "AQS_PARAMETER_DESC"            
[13] "CBSA_CODE"                      "CBSA_NAME"                     
[15] "STATE_CODE"                     "STATE"                         
[17] "COUNTY_CODE"                    "COUNTY"                        
[19] "SITE_LATITUDE"                  "SITE_LONGITUDE"                
any(is.na(alldata))
[1] TRUE
col_has_missing <- apply(alldata, 2, function(x) any(is.na(x)))
colnames(alldata)[col_has_missing]
[1] "CBSA_CODE"
alldata$Date <- as.Date(alldata$Date, format = "%m/%d/%Y")
alldata$Year <- format(alldata$Date, "%Y")

alldata$Year <- as.numeric(alldata$Year)

colnames(alldata)[colnames(alldata) == "Daily.Mean.PM2.5.Concentration"] <- "DailyPM2.5"

I combined the datasets into 1 and created a new date variable for ‘year’. I made sure year was a numeric variable. I checked to make sure it looked right. I changed the name for daily mean concentration of PM2.5 to PM2.5.

3.

library(leaflet)


sitemap <- leaflet(alldata) %>%
     addTiles()  
 for (year in unique(alldata$Year)) {
     data_filtered <- alldata[alldata$Year == year, ]
     color <- ifelse(year==2002, 'green', 'blue') 
     sitemap <- sitemap %>%
         addCircleMarkers(
             lat = ~SITE_LATITUDE, lng=~SITE_LONGITUDE,
             data = data_filtered,
             radius = 5,
             color = color,
             fill = TRUE,
             fillOpacity = 0.7,
             popup = ~Year 
         )
 }
 
 sitemap

The monitoring sites are well distribute across California. There seems to be more sites on the coastline than in the middle of the state.

4.

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':

    between, first, last
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
missing_values <- sum(is.na(alldata$DailyPM2.5))
cat("Number of missing values in DailyPM2.5:", missing_values, "\n")
Number of missing values in DailyPM2.5: 0 
summary(alldata$DailyPM2.5)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   -2.2     4.5     7.7    10.2    12.4   302.5 
implausible_values <- alldata %>%
  filter(DailyPM2.5 < 0 | DailyPM2.5 > 300) %>%
  nrow()

total_values <- nrow(alldata)

cat("Number of implausible values of DailyPM2.5:", implausible_values, "\n")
Number of implausible values of DailyPM2.5: 219 
cat("Proportion of implausible values:", implausible_values / total_values * 100, "%\n")
Proportion of implausible values: 0.2970015 %

There are not any missing values in the Daily PM2.5 concentration variable. I set my range from 0-300 when I was looking at implausible values based on average PM 2.5 numbers I saw on the EPA website. Based on the summary statistics it seems like there are a few implausible values, there are negative values that do not seem plausible. The highest is 302.5 which says is very poor but probably plausible.

5.

By State

library(ggplot2)

ggplot(alldata, aes(x = Year, y = DailyPM2.5)) +
     geom_line() +
     labs(title = "Trend in Daily PM2.5 Concentrations in California",
          x = "Year",
          y = "Daily PM2.5 Concentration")

ggplot(alldata, aes(x = Year, y = DailyPM2.5)) +
     geom_point() +
     labs(title = "Scatterplot of Daily PM2.5 Concentrations in California",
          x = "Year",
          y = "Daily PM2.5 Concentration")

ggplot(alldata, aes(x = Year, y = DailyPM2.5, fill = factor(Year))) +
     geom_boxplot() +
     labs(title = "Boxplot of Daily PM2.5 Concentrations in California",
          x = "Year",
          y = "Daily PM2.5 Concentration") +
     scale_fill_discrete(name = "Year")

summary_2002 <- summary(alldata$DailyPM2.5[alldata$Year == 2002])
summary_2022 <- summary(alldata$DailyPM2.5[alldata$Year == 2022])
 
# Compare summary statistics
summary_2002
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    7.00   12.00   16.12   20.50  104.30 
summary_2022
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -2.200   4.200   7.000   8.564  10.900 302.500 

By County

ggplot(alldata, aes(x = COUNTY, y = DailyPM2.5, fill = as.factor(Year))) +
  geom_bar(stat = "summary", fun = "mean", position = position_dodge(width = 0.8)) +
  labs(title = "Mean PM2.5 Concentration by County and Year (2002 vs. 2022)",
       x = "COUNTY",
       y = "Mean PM2.5 Concentration") +
  scale_fill_discrete(name = "Year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(alldata, aes(x = COUNTY, y = DailyPM2.5, fill = as.factor(Year))) +
  geom_violin() +
  labs(title = "PM2.5 Concentration Distribution by County and Year (2002 vs. 2022)",
       x = "COUNTY",
       y = "PM2.5 Concentration") +
  scale_fill_discrete(name = "Year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

By Sites in Los Angeles

la_county_data <- alldata %>%
  filter(COUNTY == "Los Angeles")

ggplot(la_county_data, aes(x = Site.Name, y = DailyPM2.5, fill = as.factor(Year))) +
  geom_bar(stat = "summary", fun = "mean", position = position_dodge(width = 0.8)) +
  labs(title = "Mean PM2.5 Concentration in Los Angeles County Sites (2002 vs. 2022)",
       x = "Site Name",
       y = "Mean DailyPM2.5 Concentration") +
  scale_fill_discrete(name = "Year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(la_county_data, aes(x = Site.Name, y = DailyPM2.5, fill = as.factor(Year))) +
  geom_violin() +
  labs(title = "PM2.5 Concentration Distribution in Los Angeles County Sites (2002 vs. 2022)",
       x = "Site Name",
       y = "DailyPM2.5 Concentration") +
  scale_fill_discrete(name = "Year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

I created exploratory plots of PM 2.5 Concentration in 2002 and 2022 by State, County, and Site in Los Angeles. I also compared some summary statistics, the numbers were overall higher in 2022 than 2002 indicating that the concentration of PM 2.5 has increased in 10 years. Based on my plots 2002 had higher concentrations by county. Then I looked at concentrations just in Los Angeles County and noticed that they were higher in 2002 as well.